################################################################################################################ # # # The COMPUTE module and the 3 modules that follow must be run before SNP survival estimation can be completed # # # ################################################################################################################ #The E_COMPUTE function creates the V matrix used to calculate survival E_COMPUTE <- function(R) { PDFvalue <- dexp(R) V <- matrix(nrow=n,ncol=5,0) #creates an empty matrix to be filled by survival values V[,1] <- 1-pexp(R) #recursive formula for I(0,r) V[,2] <- R * PDFvalue + V[,1] # I(1,r) V[,3] <- R^2 * PDFvalue + 2*V[,2] # I(2,r) V[,4] <- R^3 * PDFvalue + 3*V[,3] # I(3,r) V[,5] <- R^4 * PDFvalue + 4*V[,4] # I(4,r) return(V) } #E_module_0 computes the negative log likelihood for the k=0 case E_module_0 <- function(omega) { X <- as.matrix(snp/omega[1]) f <- (omega[1]^-1 * exp(-X)) #SNP density function for k=0 output <- E_COMPUTE(X) S<- output[,1] neglike <- -delta*log(f) - (1-delta)*log(S) #defines negative log likelihood SUM = sum(neglike) #summation of negative log likelihood return(SUM) #saves SUM values } E_module_1 <- function(omega) { X <- as.matrix(snp/omega[1]) a0 <- cos(omega[2])-sin(omega[2]) a1 <- sin(omega[2]) #defines a0 and a1 as functions of phi f <- (a0+a1*X)^2*(omega[1])^-1 * exp(-X) #SNP density function for k=1 OUT <- E_COMPUTE(X) S <- a0**2 * OUT[,1] + a1**2 * OUT[,3] + 2*a0*a1*OUT[,2] #survival function for k=1 neglike <- -delta*log(f) - (1-delta)*log(S) SUM = sum(neglike) return(SUM) } E_module_2 <- function(omega) { X <- as.matrix(snp/omega[1]) a0 <- (cos(omega[2])-sin(omega[2])*cos(omega[3])+sin(omega[2])*sin(omega[3])) a1 <- (sin(omega[2])*cos(omega[3])-2*sin(omega[2])*sin(omega[3])) a2 <- (.5*sin(omega[2])*sin(omega[3])) f <- ((omega[1]^-1)*(a0+a1*X+a2*X^2)^2 *exp(-X)) output<- E_COMPUTE(X) S<- (a0**2*output[,1] + a1^2*output[,3] + a2^2*output[,5] + 2*a0*a1*output[,2] + 2*a0*a2*output[,3] + 2*a1*a2*output[,4]) neglike <- -delta*log(f) - (1-delta)*log(S) SUM = sum(neglike) return(SUM) } set.seed(12345) n <- 200 #sets the number of observations in our simulated data set N <- 100 # GENERATING SURVIVAL TIMES IN THE "All_Data" MATRIX #################################################### EXPONENTIAL DATA #therate <- 2.5 #All_Survivals_vector <- rexp(N*n,therate) #################################################### WEIBULL DATA #theshape <- 4 #thescale <- 2 #All_Survivals_vector <- rweibull(N*n,theshape,thescale) #################################################### LOGNORMAL DATA All_Survivals_vector <- rlnorm(N*n, meanlog = 0, sdlog = 1) # GENERATING CENSORING TIMES IN THE "All_Deltas" MATRIX #################################################### CENSORING TIMES WHEN DATA IS EXPONENTIAL #All_Censorings_vector <- runif(N*n, min=0, max=2.) # 20% right-censoring for exp data # All_Censorings_vector <- runif(N*n, min=4, max=8.) # No censoring for exp data #################################################### CENSORING TIMES WHEN DATA IS WEIBULL #All_Censorings_vector <- runif(N*n, min=0, max=9.05) #################################################### CENSORING TIMES WHEN DATA IS LOGNORMAL All_Censorings_vector <- runif(N*n, min=0, max=9.8) # GENERATING "All_Data" which is the vector of times (either survival or censoring times) Nn <- N*n A <- 1*(All_Censorings_vector >= All_Survivals_vector) * All_Survivals_vector B <- 1*(All_Censorings_vector < All_Survivals_vector) * All_Censorings_vector All_Times_vector <- A + B junk1 <- cbind(All_Survivals_vector, All_Censorings_vector, All_Times_vector) junk1[1:20,] # GENERATING THE DELTA VECTOR All_Deltas_vector <- matrix(ncol=1, nrow=N*n,0) Nn <- N*n # Below is a new way of generating the vector of delta values that does NOT require a LOOP!!! All_Deltas_vector <- 1*(All_Censorings_vector > All_Survivals_vector) # CHECKING THE DELTA VECTOR TO SEE IF IT IS CORRECT junk2 <- cbind(All_Survivals_vector, All_Censorings_vector, All_Deltas_vector) junk2[1:20,] mean(All_Deltas_vector) # MAKING THE HUGE VECTOR OF TIMES INTO A MATIX AND # MAKING THE HUGE VECTOR OF 0/1 VALUES INTO A MATRIX All_Times <- matrix(All_Times_vector, ncol=n) All_Deltas <- matrix(All_Deltas_vector ,ncol=n) # GRAPHING STUFF max_value <- max(All_Survivals_vector) graphing_xvalues <- seq(0.001,max_value+4,.01) #vector of times from 0.01 to our max value, in increments of .01 ### BEFORE WE HAD THE 2 LINES BELOW TO GENERATE CENSORING TIMES BUT THIS IS WRONG!!! ### All_Deltas_vector <-rbinom(N*n,1,proportion) #creates delta vector of successes and failures ### All_Deltas <- matrix(All_Deltas_vector, ncol=n) Final_Results <- matrix(nrow=N, ncol=5, 9999) ######################### STARTING THE BIG LOOP ######################### STARTING THE BIG LOOP ######################### STARTING THE BIG LOOP for(j in 1:N) { snp <- All_Times[j,] delta <- All_Deltas[j,] newdata <- cbind(snp,delta) #combines the snp and delta vectors to create a data set with censoring newdataB <- subset(newdata,delta==1) #eliminates censored data xbar <- mean(newdataB[,1]) #calculates the mean of the uncensored data ######optimizing k=0 x_values <- seq(xbar-.5*xbar,xbar+.5*xbar,xbar/4) #x_values x_length_K0 <- length(x_values) K0_results <- matrix(nrow=x_length_K0, ncol=4,0) row <- 0 for (x in x_values) { row <- row+1 #print(x) out <- optim( x , E_module_0) #print(out$par) K0_results[row,1] <- out$par # storing out$par in the correct row and 1st 2 columns of the "K1_results" matrix } #K0_results #################### computing HQ for k=0 for (i in 1:x_length_K0) { omega <- K0_results[i,1] fopt <- E_module_0(omega) # computing "fopt" needed for HQ HQ <- 2*fopt + 2*1*log(log(n)) # computing HQ K0_results[i,4] <- HQ # put HQ in the 4th column of the i^th row of the "K1_results" matrix } # printing the "K1_results" matrix again - notice that the 4th column is filled in now #choosing the best estimates omega_K0 <- c( K0_results[which.min(K0_results[,4]),] , 0) ######optimizing k=1 phi_values <- seq(-1.5,1.5,.1) #phi_values phi_length_K1 <- length(phi_values) # setting up a matrix to store the K=1 omega values K1_results <- matrix(nrow=phi_length_K1, ncol=4,0) row <- 0 # setting the initial value of "row" to zero for (phi in phi_values) # this loop which goes from 1 to 31 and computes estimates for each set of starting values { row <- row+1 # increasing the "row" value by one each time the loop runs #print(phi) a0 <- cos(phi)-sin(phi) a1 <- sin(phi) EZ <- a0^2 + 6*a1^2 + 4*a0*a1 r <- xbar/EZ out <- optim(c(r,phi), E_module_1) #print(out$par) K1_results[row,1:2] <- out$par # storing out$par in the correct row and 1st 2 columns of the "K1_results" matrix } #K1_results # printing the "K1_results" matrix which contains all omega estimates #################### computing HQ for k=1 for (i in 1:phi_length_K1) # starting a loop that goes froom 1 to 31 { omega <- K1_results[i,1:2] # this says that omega is equal to the first 2 columns of the i^th row of the "K1_results" matrix fopt <- E_module_1(omega) # computing "fopt" needed for HQ HQ <- 2*fopt + 2*2*log(log(n)) # computing HQ K1_results[i,4] <- HQ # put HQ in the 4th column of the i^th row of the "K1_results" matrix } #K1_results # printing the "K1_results" matrix again - notice that the 4th column is filled in now #choosing the best estimates omega_K1 <- c( K1_results[which.min(K1_results[,4]),] , 1) ######optimizing k=2 phi1_values <- seq(-1.5,1.5,1) phi2_values <- seq(-1.5,1.5,1) phi_length_K2 <- (length(phi1_values)*length(phi2_values)) K2_results <- matrix(nrow=phi_length_K2, ncol=4,0) row <- 0 for (phi1 in phi1_values) { for (phi2 in phi2_values) { row <- row+1 a0 <- (cos(phi1)-sin(phi1)*cos(phi2)+sin(phi1)*sin(phi2)) a1 <- (sin(phi1)*cos(phi2)-2*sin(phi1)*sin(phi2)) a2 <- (.5*sin(phi1)*sin(phi2)) EZ <- a0**2 + 6*a1**2 + 120*a2**2 + 4*a0*a1 + 12*a0*a2 + 48*a1*a2 r <- xbar/EZ out <- optim(c(r,phi1,phi2), E_module_2) #print(out$par) K2_results[row,1:3] <- out$par } } #################### computing HQ for k=2 for(i in 1:phi_length_K2) { omega <- K2_results[i,1:3] fopt <- E_module_2(omega) HQ <- 2*fopt + 2*3*log(log(n)) K2_results[i,4] <- HQ } #K2_results #choosing best estimate for k=2 omega_K2 <- c( K2_results[which.min(K2_results[,4]),] , 2) # combining the best K=0, K=1, and K=2 estimates into one matrix all3 <- rbind(omega_K0, omega_K1, omega_K2) final_omega <- all3[ which.min (all3[,4]), ] Final_Results[j, ] <- final_omega } ######################### ENDING THE BIG LOOP ######################### ENDING THE BIG LOOP ######################### ENDING THE BIG LOOP Final_Results ######### PLOTTING CODE plot.new() par(mai=c(1,1,1,1)) box() #####################plot.window(xlim= c(0, max_value-1.5), ylim=c(0,1), xaxs="i", yaxs="i") #plot.window(xlim= c(0, 2.5), ylim=c(0,1), xaxs="i", yaxs="i") #EXPONENTIAL plot.window(xlim= c(0, 6), ylim=c(0,1), xaxs="i", yaxs="i") #LOGNORMAL axis(1) axis(2) title(main="SNP and KM Estimates", xlab="time", ylab="survival probability") # DEFINING THE 5 TIMES OF INTEREST quantiles <- c(.1,.3,.5,.7,.9) #times <- qexp(quantiles,rate=therate) #TIMES OF INTEREST FOR EXP DATA #times <- qweibull(quantiles,shape=theshape,scale=thescale) #TIMES OF INTEREST FOR WEIB DATA times <- qlnorm(quantiles, meanlog = 0, sdlog = 1) #TIMES OF INTEREST FOR LOGNORMAL DATA # note that 'qnorm(.975)' results in 1.96 times Selected_KM_survivals <- matrix(nrow=N,ncol=5) for (j in 1:N) { # KM code snp <- All_Times[j,] delta <- All_Deltas[j,] library(survival) #loads survival package needed to graph KM fit <- survfit(Surv(snp, delta) ~ 1) #calculates survival values based on KM lines(fit, conf.int=FALSE,col="grey60") #plots KM KMout <- approx(fit$time, fit$surv, times, method="constant" , f=0, yleft=1, yright=0) Selected_KM_survivals[j,] <- KMout$y } Selected_KM_survivals # SETTING UP MATRICES NEEDED FOR GRAPHING LOOP Xlength <- length(graphing_xvalues) All_SNP_survivals <- matrix(nrow=N, ncol=Xlength) Selected_SNP_survivals <- matrix(nrow=N,ncol=5) for(j in 1:N) { X <- graphing_xvalues/Final_Results[j,1] X2 <- times/Final_Results[j,1] if (Final_Results[j,5] == 0) { S <- 1-pexp(X) lines(graphing_xvalues,S,col="blue") All_SNP_survivals[j,] <- S # fills in SNP survivals at all of the many times Selected_SNP_survivals[j,] <- 1-pexp(X2) # fills in SNP survivals for 5 times } else if (Final_Results[j,5] == 1) { PDFvalue <- dexp(X) m <- length(X) V <- matrix(nrow=m,ncol=3,0) #creates a smaller V matrix V[,1] <- 1-pexp(X) V[,2] <- X * PDFvalue + V[,1] V[,3] <- X^2*PDFvalue + 2*V[,2] a0 <- cos( Final_Results[j,2] )-sin( Final_Results[j,2] ) a1 <- sin( Final_Results[j,2] ) S <- a0 **2 * V[,1] + a1 **2 * V[,3] + 2*a0*a1*V[,2] #from survival equation lines(graphing_xvalues,S,col="green") All_SNP_survivals[j,] <- S # fills in SNP survivals at all of the many times # UPDATE WHAT GOES HERE PDFvalue2 <- dexp(X2) m2 <-length(X2) V2 <- matrix(nrow=m2,ncol=3,0) V2[,1] <- 1-pexp(X2) V2[,2] <- X2 * PDFvalue2 + V2[,1] V2[,3] <- (X2)^2*PDFvalue2 + 2*V2[,2] ###a02 <- cos( Final_Results[j,2] )-sin( Final_Results[j,2] ) ###a12 <-sin( Final_Results[j,2] ) S2 <- a0 **2 * V2[,1] + a1 **2 * V2[,3] + 2*a0*a1*V2[,2] Selected_SNP_survivals[j,] <- S2 # fills in SNP survivals for 5 times } else { PDFvalue <- dexp(X) m <- length(X) V <- matrix(nrow=m,ncol=5,0) V[,1] <- 1-pexp(X) #recursive formula for I(0,r) V[,2] <- X * PDFvalue + V[,1] # I(1,r) V[,3] <- X^2 * PDFvalue + 2*V[,2] # I(2,r) V[,4] <- X^3 * PDFvalue + 3*V[,3] # I(3,r) V[,5] <- X^4 * PDFvalue + 4*V[,4] # I(4,r) a0 <- (cos( Final_Results[j,2] )-sin( Final_Results[j,2] )*cos( Final_Results[j,3] )+sin( Final_Results[j,2] )*sin( Final_Results[j,3] )) a1 <- (sin( Final_Results[j,2] )*cos( Final_Results[j,3] )-2*sin( Final_Results[j,2] )*sin(Final_Results[j,3] )) a2 <- (.5*sin( Final_Results[j,2] )*sin( Final_Results[j,3] )) S <- (a0**2*V[,1] + a1^2*V[,3] + a2^2*V[,5] + 2*a0*a1*V[,2] + 2*a0*a2*V[,3] + 2*a1*a2*V[,4]) lines(graphing_xvalues,S,col="red") All_SNP_survivals[j,] <- S # fills in SNP survivals at all of the many times # UPDATE WHAT GOES HERE PDFvalue2 <- dexp(X2) m2 <- length(X2) V2 <- matrix(nrow=m2,ncol=5,0) V2[,1] <- 1-pexp(X2) #recursive formula for I(0,r) V2[,2] <- X2 * PDFvalue2 + V2[,1] # I(1,r) V2[,3] <- (X2)^2 * PDFvalue2 + 2*V2[,2] # I(2,r) V2[,4] <- (X2)^3 * PDFvalue2 + 3*V2[,3] # I(3,r) V2[,5] <- (X2)^4 * PDFvalue2 + 4*V2[,4] # I(4,r) ###a02 <- (cos( Final_Results[j,2] )-sin( Final_Results[j,2] )*cos( Final_Results[j,3] )+sin( Final_Results[j,2] )*sin( Final_Results[j,3] )) ###a12 <- (sin( Final_Results[j,2] )*cos( Final_Results[j,3] )-2*sin( Final_Results[j,2] )*sin(Final_Results[j,3] )) ###a22 <- (.5*sin( Final_Results[j,2] )*sin( Final_Results[j,3] )) S2 <- (a0**2*V2[,1] + a1^2*V2[,3] + a2^2*V2[,5] + 2*a0*a1*V2[,2] + 2*a0*a2*V2[,3] + 2*a1*a2*V2[,4]) Selected_SNP_survivals[j,] <- S2 # fills in SNP survivals for 5 times } } Selected_SNP_survivals # YOU WILL KNOW IF YOU GET THE RIGHT THING AFTER PRINTING "Avg_at_5times" Avg_SNPsurv_at_5times <- colMeans(Selected_SNP_survivals,na.rm=FALSE) Avg_SNPsurv_at_5times Avg_KMsurv_at_5times <- colMeans(Selected_KM_survivals,na.rm=FALSE) Avg_KMsurv_at_5times true_surv <- c(.9,.7,.5,.3,.1) SNPbias <- (Avg_SNPsurv_at_5times - true_surv) SNPbias KMbias <- (Avg_KMsurv_at_5times - true_surv) KMbias SNPvariance <- apply(Selected_SNP_survivals, 2, var) SNPvariance KMvariance <- apply(Selected_KM_survivals, 2, var) KMvariance SNP_MSE <- SNPvariance + SNPbias^2 SNP_MSE KM_MSE <- KMvariance + KMbias^2 KM_MSE RE <- KM_MSE/SNP_MSE RE # graphing SNP average SNP_average <- colMeans(All_SNP_survivals,na.rm=FALSE) lines(graphing_xvalues,SNP_average,lwd=8,col="orange") # graphing the true survival #truth <- exp(-therate*graphing_xvalues) # EXPONENTIAL TRUE SURVIVAL #truth <- exp(- (graphing_xvalues/thescale)^theshape) # WEIBULL TRUE SURVIVAL truth <- 1-plnorm(graphing_xvalues, meanlog = 0, sdlog = 1, lower.tail = TRUE, log.p = FALSE) # LOGNORMAL TRUE SURVIVAL lines(graphing_xvalues,truth,lwd=6,col="black")